file <- read.csv("pml-training.csv", stringsAsFactors = F,na.strings = c("","NA","#DIV/0!"))
second_file <- read.csv("pml-testing.csv", stringsAsFactors = F,na.strings = c("","NA","#DIV/0!"))
partition <- createDataPartition(file$classe, p = 0.8, list = F)
ger <- file[-partition,]
file <- file[partition,]
b_m <- sapply(select(file,names(file)[grepl("_belt",names(file))]),
function(x) sum(is.na(x)))
arm_m <- sapply(select(file,names(file)[grepl("_arm",names(file))]),
function(x) sum(is.na(x)))
f_m <- sapply(select(file,
names(file)[grepl("_forearm",names(file))]),
function(x) sum(is.na(x)))
dumb_m <- sapply(select(file,
names(file)[grepl("_dumbbell",names(file))]),
function(x) sum(is.na(x)))
c_d <- c(names(b_m[b_m != 0]),
names(arm_m[arm_m != 0]),
names(f_m[f_m != 0]),
names(dumb_m[dumb_m != 0]))
a_n_d <- tbl_df(file %>%
select(-c_d,
-c(X,user_name, raw_timestamp_part_1,
raw_timestamp_part_2, cvtd_timestamp,
new_window,num_window)))
## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(c_d)` instead of `c_d` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
a_n_d$classe <- as.factor(a_n_d$classe)
a_n_d[,1:52] <- lapply(a_n_d[,1:52],as.numeric)
craasd <- cor(select(a_n_d, -classe))
diag(craasd) <- 0
craasd <- which(abs(craasd)>0.8,arr.ind = T)
craasd <- unique(row.names(craasd))
corrplot(cor(select(a_n_d,craasd)),
type="upper", order="hclust",method = "number")
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(craasd)` instead of `craasd` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

# binarizing data
#correlationfunnel website: https://business-science.github.io/correlationfunnel/
cfun_data <- a_n_d %>% binarize(n_bins = 4, thresh_infreq = 0.01)
correelaation_asd <- cfun_data %>% correlate(target = classe__A)
correelaation_asd %>% plot_correlation_funnel(interactive = T,limits = c(-0.5,0.5))
correaa_basd <- cfun_data %>% correlate(target = classe__B)
correaa_basd %>% plot_correlation_funnel(interactive = T,limits = c(-0.5,0.5))
correasde <- cfun_data %>% correlate(target = classe__C)
correasde %>% plot_correlation_funnel(interactive = T,limits = c(-0.5,0.5))
corritmonos <- cfun_data %>% correlate(target = classe__D)
corritmonos %>% plot_correlation_funnel(interactive = T,limits = c(-0.5,0.5))
curote_eae <- cfun_data %>% correlate(target = classe__E)
curote_eae %>% plot_correlation_funnel(interactive = T,limits = c(-0.5,0.5))
#subseting a_n_d
collumo_asr <- c("magnet_arm_x", "pitch_forearm" , "magnet_dumbbell_y",
"roll_forearm", "gyros_dumbbell_y")
collumo_bsr <- c("magnet_dumbbell_y", "magnet_dumbbell_x" , "roll_dumbbell" ,
"magnet_belt_y" , "accel_dumbbell_x" )
collumo_csr <- c("magnet_dumbbell_y", "roll_dumbbell" , "accel_dumbbell_y" ,
"magnet_dumbbell_x", "magnet_dumbbell_z")
collumo_dsr <- c("pitch_forearm" , "magnet_arm_y" , "magnet_forearm_x",
"accel_dumbbell_y", "accel_forearm_x")
collumo_esr <- c("magnet_belt_y" , "magnet_belt_z" , "roll_belt",
"gyros_belt_z" , "magnet_dumbbell_y")
fianslasasas <- character()
for(c in c(collumo_asr,collumo_bsr,collumo_csr,collumo_dsr,collumo_esr)){
fianslasasas <- union(fianslasasas, c)
}
a_n_d2 <- a_n_d %>% select(fianslasasas, classe)
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(fianslasasas)` instead of `fianslasasas` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
data.frame("arm" = sum(grepl("_arm",fianslasasas)),
"forearm" = sum(grepl("_forearm",fianslasasas)),
"belt" = sum(grepl("_belt",fianslasasas)),
"dumbbell" = sum(grepl("_dumbbell",fianslasasas)))
## arm forearm belt dumbbell
## 1 2 4 4 7
dineermera <- function(data, mapping, ...) {
ggplot(data = data, mapping=mapping) +
geom_density(..., alpha = 0.3)+scale_fill_brewer(palette="Set2")
}
p_dineermera <- function(data, mapping, ...) {
ggplot(data = data, mapping=mapping) +
geom_point(..., alpha = 0.1)+ scale_fill_brewer(palette="Set2")
}
ggpairs(a_n_d2, columns = 1:5,aes(color = classe),
lower = list(continuous = p_dineermera),diag = list(continuous = dineermera))

ggpairs(a_n_d2, columns = 6:10,aes(color = classe),
lower = list(continuous = p_dineermera),diag = list(continuous = dineermera))

ggpairs(a_n_d2, columns = 11:17,aes(color = classe),
lower = list(continuous = p_dineermera),diag = list(continuous = dineermera))
